{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "01d70cf7",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/output_parsing/LangchainOutputParserDemo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05",
   "metadata": {},
   "source": [
    "# Langchain Output Parsing"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "6f4af25e",
   "metadata": {},
   "source": [
    "Download Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "04d0bb21",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-llms-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9635dc3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Will not apply HSTS. The HSTS database must be a regular and non-world-writable file.\n",
      "ERROR: could not open HSTS store at '/home/loganm/.wget-hsts'. HSTS will be disabled.\n",
      "--2023-12-11 10:24:04--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 75042 (73K) [text/plain]\n",
      "Saving to: ‘data/paul_graham/paul_graham_essay.txt’\n",
      "\n",
      "data/paul_graham/pa 100%[===================>]  73.28K  --.-KB/s    in 0.04s   \n",
      "\n",
      "2023-12-11 10:24:04 (1.74 MB/s) - ‘data/paul_graham/paul_graham_essay.txt’ saved [75042/75042]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!mkdir -p 'data/paul_graham/'\n",
    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119",
   "metadata": {},
   "source": [
    "#### Load documents, build the VectorStoreIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
    "\n",
    "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
    "from IPython.display import Markdown, display\n",
    "\n",
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "03d1691e-544b-454f-825b-5ee12f7faa8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load documents\n",
    "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
     ]
    }
   ],
   "source": [
    "index = VectorStoreIndex.from_documents(documents, chunk_size=512)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8b7d7c61-b5d7-4b8f-b90b-3ebee1103f27",
   "metadata": {},
   "source": [
    "#### Define Query + Langchain Output Parser"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6fb88295-0840-4e2d-b79b-def0b0a63a7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.output_parsers import LangchainOutputParser\n",
    "from langchain.output_parsers import StructuredOutputParser, ResponseSchema"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bc25edf7-9343-4e82-a3f1-eec4281a9371",
   "metadata": {},
   "source": [
    "**Define custom QA and Refine Prompts**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a4b9201d-fe16-4cc0-8135-a08d9928625d",
   "metadata": {},
   "outputs": [],
   "source": [
    "response_schemas = [\n",
    "    ResponseSchema(\n",
    "        name=\"Education\",\n",
    "        description=(\n",
    "            \"Describes the author's educational experience/background.\"\n",
    "        ),\n",
    "    ),\n",
    "    ResponseSchema(\n",
    "        name=\"Work\",\n",
    "        description=\"Describes the author's work experience/background.\",\n",
    "    ),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e73b87b8-90da-4ab8-9ff7-e40880277d9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "lc_output_parser = StructuredOutputParser.from_response_schemas(\n",
    "    response_schemas\n",
    ")\n",
    "output_parser = LangchainOutputParser(lc_output_parser)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ba18a80-35f4-4fd4-9b13-9f13f84db4fe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Context information is below.\n",
      "---------------------\n",
      "{context_str}\n",
      "---------------------\n",
      "Given the context information and not prior knowledge, answer the query.\n",
      "Query: {query_str}\n",
      "Answer: \n",
      "\n",
      "The output should be a markdown code snippet formatted in the following schema, including the leading and trailing \"```json\" and \"```\":\n",
      "\n",
      "```json\n",
      "{{\n",
      "\t\"Education\": string  // Describes the author's educational experience/background.\n",
      "\t\"Work\": string  // Describes the author's work experience/background.\n",
      "}}\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "from llama_index.core.prompts.default_prompts import (\n",
    "    DEFAULT_TEXT_QA_PROMPT_TMPL,\n",
    ")\n",
    "\n",
    "# take a look at the new QA template!\n",
    "fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)\n",
    "print(fmt_qa_tmpl)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4",
   "metadata": {},
   "source": [
    "#### Query Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb9cdf43-0f31-4c36-869b-df9fa50aebdb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    }
   ],
   "source": [
    "from llama_index.llms.openai import OpenAI\n",
    "\n",
    "llm = OpenAI(output_parser=output_parser)\n",
    "\n",
    "query_engine = index.as_query_engine(\n",
    "    llm=llm,\n",
    ")\n",
    "response = query_engine.query(\n",
    "    \"What are a few things the author did growing up?\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc7760b6-5be3-4303-b97e-3f5edacf674b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Education': 'The author did not plan to study programming in college, but initially planned to study philosophy.', 'Work': 'Growing up, the author worked on writing short stories and programming. They wrote simple games, a program to predict rocket heights, and a word processor.'}\n"
     ]
    }
   ],
   "source": [
    "print(response)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama-index-4a-wkI5X-py3.11",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
